import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display, HTML
from matplotlib.ticker import FuncFormatter
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from scipy.cluster.hierarchy import dendrogram, linkage
#loading the dataset given
df1=pd.read_csv('data_1.csv')
df3=pd.read_csv('data_3.csv')
df4=pd.read_csv('data_4.csv')
df2 = pd.read_csv("data_2.txt")
# df3=pd.read_('data_3.txt')
#checking the given data
data=['df1','df2','df3','df4']
colm={}
for i,col in enumerate([df1,df2,df3,df4]):
colm[data[i]]=set(col.columns)
colm
{'df1': {'Account Description',
'Creditor',
'JV Date',
'JV Reference',
'JV Value',
'Service Area',
'Transaction Date'},
'df2': {'Account Description',
'Creditor',
'Journal Date',
'Journal Reference',
'Service Area',
'Total'},
'df3': {'Account Description',
'Creditor',
'Journal Date',
'Journal Reference',
'Service Area',
'Total'},
'df4': {'FIN.INET CONVERSION',
'FIN.ORIGINAL CURRENCY AMOUNT',
'FIN.ORIGINAL ISO CURRENCY CODE SYMBOL',
'FIN.POSTING DATE',
'FIN.TRANSACTION AMOUNT',
'FIN.TRANSACTION DATE',
'MCH.CITY NAME',
'MCH.MERCHANT NAME'}}
#since we can remove the JV Date from the first data
df1.drop(columns=['JV Date'],inplace=True)
df1.head()
| Service Area | Account Description | Creditor | Transaction Date | JV Reference | JV Value | |
|---|---|---|---|---|---|---|
| 0 | Childrens Services | IT Services | 123-REG.CO.UK | 23/04/2014 | 93 | 143.81 |
| 1 | Childrens Services | Other Services | ACCESS EXPEDITIONS | 03/04/2014 | 111 | 6,000.00 |
| 2 | Childrens Services | Equipment and Materials Repair | AFE SERVICELINE | 02/04/2014 | 6 | 309.38 |
| 3 | Childrens Services | Equipment and Materials Repair | AFE SERVICELINE | 02/04/2014 | 7 | 218.76 |
| 4 | Childrens Services | Building Repairs & Maintenance | ALLSOP & FRANCIS | 15/04/2014 | 381 | 306 |
df2.head()
| Service Area | Account Description | Creditor | Journal Date | Journal Reference | Total | |
|---|---|---|---|---|---|---|
| 0 | Assurance | Miscellaneous Expenses | 43033820 COSTA COFFEE | 18/08/2015 | 5043.0 | 2 |
| 1 | Children's Family Services | Miscellaneous Expenses | 99 PLUS DISCOUNT MART | 08/06/2015 | 4184.0 | 29.97 |
| 2 | Children's Family Services | E19 - Learning Resources | 99P STORES LTD | 07/12/2015 | 6278.0 | 34.65 |
| 3 | Children's Family Services | Equipment and Materials Purcha | 99P STORES LTD | 18/08/2015 | 5041.0 | 10.72 |
| 4 | Children's Family Services | Subsistence | CHOPSTIX00000000000 | 21/05/2015 | 5750.0 | 33.7 |
column_names=['Service Area','Account Description',
'Creditor',
'Transaction Date',
'JV Reference',
'JV Value' ]
for i,col in enumerate([df1,df2,df3]):
col.set_axis(column_names,axis=1,inplace=True)
data=['df1','df2','df3']
colm={}
for i,col in enumerate([df1,df2,df3]):
colm[data[i]]=set(col.columns)
set.intersection(*colm.values())#all the columns are common here
{'Account Description',
'Creditor',
'JV Reference',
'JV Value',
'Service Area',
'Transaction Date'}
df1.head()
| Service Area | Account Description | Creditor | Transaction Date | JV Reference | JV Value | |
|---|---|---|---|---|---|---|
| 0 | Childrens Services | IT Services | 123-REG.CO.UK | 23/04/2014 | 93 | 143.81 |
| 1 | Childrens Services | Other Services | ACCESS EXPEDITIONS | 03/04/2014 | 111 | 6,000.00 |
| 2 | Childrens Services | Equipment and Materials Repair | AFE SERVICELINE | 02/04/2014 | 6 | 309.38 |
| 3 | Childrens Services | Equipment and Materials Repair | AFE SERVICELINE | 02/04/2014 | 7 | 218.76 |
| 4 | Childrens Services | Building Repairs & Maintenance | ALLSOP & FRANCIS | 15/04/2014 | 381 | 306 |
df2.head()
| Service Area | Account Description | Creditor | Transaction Date | JV Reference | JV Value | |
|---|---|---|---|---|---|---|
| 0 | Assurance | Miscellaneous Expenses | 43033820 COSTA COFFEE | 18/08/2015 | 5043.0 | 2 |
| 1 | Children's Family Services | Miscellaneous Expenses | 99 PLUS DISCOUNT MART | 08/06/2015 | 4184.0 | 29.97 |
| 2 | Children's Family Services | E19 - Learning Resources | 99P STORES LTD | 07/12/2015 | 6278.0 | 34.65 |
| 3 | Children's Family Services | Equipment and Materials Purcha | 99P STORES LTD | 18/08/2015 | 5041.0 | 10.72 |
| 4 | Children's Family Services | Subsistence | CHOPSTIX00000000000 | 21/05/2015 | 5750.0 | 33.7 |
df=pd.concat([df1,df2,df3],ignore_index=True)
df.head()
| Service Area | Account Description | Creditor | Transaction Date | JV Reference | JV Value | |
|---|---|---|---|---|---|---|
| 0 | Childrens Services | IT Services | 123-REG.CO.UK | 23/04/2014 | 93.0 | 143.81 |
| 1 | Childrens Services | Other Services | ACCESS EXPEDITIONS | 03/04/2014 | 111.0 | 6,000.00 |
| 2 | Childrens Services | Equipment and Materials Repair | AFE SERVICELINE | 02/04/2014 | 6.0 | 309.38 |
| 3 | Childrens Services | Equipment and Materials Repair | AFE SERVICELINE | 02/04/2014 | 7.0 | 218.76 |
| 4 | Childrens Services | Building Repairs & Maintenance | ALLSOP & FRANCIS | 15/04/2014 | 381.0 | 306 |
df.shape[0]==sum([df1.shape[0],df2.shape[0],df3.shape[0]])
True
#getting the details of the combined data set
def get_data_info(data):
print("shape of the dataframe: ",data.shape,"\n","="*50)
print("size of the data: ",data.size,"\n","="*50)
print("duplicate values present: ",data.duplicated().sum(),"\n","="*50)
print("missing values %: \n",data.isnull().mean()*100,"\n","="*50)
print("unique values present in the data: \n",data.nunique(),"\n","="*50)
get_data_info(df)
shape of the dataframe: (12589, 6) ================================================== size of the data: 75534 ================================================== duplicate values present: 0 ================================================== missing values %: Service Area 0.007943 Account Description 0.015887 Creditor 0.015887 Transaction Date 0.015887 JV Reference 0.015887 JV Value 0.000000 dtype: float64 ================================================== unique values present in the data: Service Area 25 Account Description 67 Creditor 1936 Transaction Date 739 JV Reference 10742 JV Value 6292 dtype: int64 ==================================================
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 12589 entries, 0 to 12588 Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Service Area 12588 non-null object 1 Account Description 12587 non-null object 2 Creditor 12587 non-null object 3 Transaction Date 12587 non-null object 4 JV Reference 12587 non-null float64 5 JV Value 12589 non-null object dtypes: float64(1), object(5) memory usage: 590.2+ KB
#the values of jv value is in object/string type
df['JV Value']=df['JV Value'].apply(lambda x:(''.join(re.findall(r"\d+",x)) ))
df['JV Value']=df['JV Value'].astype(float)
df['JV Value'].dtype #mission success
dtype('float64')
df['Transaction Date']=pd.to_datetime(df['Transaction Date'])
df['date']=df['Transaction Date'].dt.day
df['month']=df['Transaction Date'].dt.month
df['year']=df['Transaction Date'].dt.year
df['QT']=df['Transaction Date'].dt.quarter
df['QT_validation']=df['Transaction Date'].dt.to_period('Q')
df['final_qr'] = pd.PeriodIndex(df['Transaction Date'], freq='Q')
df['Quarter'] = df['QT'].map(lambda x: "Q{}".format(x)).str[:-2]
df.head()
| Service Area | Account Description | Creditor | Transaction Date | JV Reference | JV Value | date | month | year | QT | QT_validation | final_qr | Quarter | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Childrens Services | IT Services | 123-REG.CO.UK | 2014-04-23 | 93.0 | 14381.0 | 23.0 | 4.0 | 2014.0 | 2.0 | 2014Q2 | 2014Q2 | Q2 |
| 1 | Childrens Services | Other Services | ACCESS EXPEDITIONS | 2014-03-04 | 111.0 | 600000.0 | 4.0 | 3.0 | 2014.0 | 1.0 | 2014Q1 | 2014Q1 | Q1 |
| 2 | Childrens Services | Equipment and Materials Repair | AFE SERVICELINE | 2014-02-04 | 6.0 | 30938.0 | 4.0 | 2.0 | 2014.0 | 1.0 | 2014Q1 | 2014Q1 | Q1 |
| 3 | Childrens Services | Equipment and Materials Repair | AFE SERVICELINE | 2014-02-04 | 7.0 | 21876.0 | 4.0 | 2.0 | 2014.0 | 1.0 | 2014Q1 | 2014Q1 | Q1 |
| 4 | Childrens Services | Building Repairs & Maintenance | ALLSOP & FRANCIS | 2014-04-15 | 381.0 | 306.0 | 15.0 | 4.0 | 2014.0 | 2.0 | 2014Q2 | 2014Q2 | Q2 |
df.Quarter.value_counts()
Q1 3272 Q3 3216 Q4 3168 Q2 2931 Qn 2 Name: Quarter, dtype: int64
get_data_info(df)
shape of the dataframe: (12589, 13) ================================================== size of the data: 163657 ================================================== duplicate values present: 0 ================================================== missing values %: Service Area 0.007943 Account Description 0.015887 Creditor 0.015887 Transaction Date 0.015887 JV Reference 0.015887 JV Value 0.000000 date 0.015887 month 0.015887 year 0.015887 QT 0.015887 QT_validation 0.015887 final_qr 0.015887 Quarter 0.000000 dtype: float64 ================================================== unique values present in the data: Service Area 25 Account Description 67 Creditor 1936 Transaction Date 739 JV Reference 10742 JV Value 5353 date 31 month 12 year 4 QT 4 QT_validation 16 final_qr 16 Quarter 5 dtype: int64 ==================================================
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 12589 entries, 0 to 12588 Data columns (total 13 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Service Area 12588 non-null object 1 Account Description 12587 non-null object 2 Creditor 12587 non-null object 3 Transaction Date 12587 non-null datetime64[ns] 4 JV Reference 12587 non-null float64 5 JV Value 12589 non-null float64 6 date 12587 non-null float64 7 month 12587 non-null float64 8 year 12587 non-null float64 9 QT 12587 non-null float64 10 QT_validation 12587 non-null period[Q-DEC] 11 final_qr 12587 non-null period[Q-DEC] 12 Quarter 12589 non-null object dtypes: datetime64[ns](1), float64(6), object(4), period[Q-DEC](2) memory usage: 1.2+ MB
df.describe().T
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| JV Reference | 12587.0 | 6126.846906 | 3248.967457 | 1.0 | 3943.5 | 5795.0 | 8847.5 | 12136.0 |
| JV Value | 12589.0 | 14626.548495 | 541065.815394 | 0.0 | 298.0 | 1600.0 | 5825.0 | 47104401.0 |
| date | 12587.0 | 15.853976 | 8.456099 | 1.0 | 9.0 | 16.0 | 23.0 | 31.0 |
| month | 12587.0 | 6.467625 | 3.489941 | 1.0 | 3.0 | 7.0 | 10.0 | 12.0 |
| year | 12587.0 | 2015.268610 | 0.950082 | 2014.0 | 2014.0 | 2015.0 | 2016.0 | 2017.0 |
| QT | 12587.0 | 2.498927 | 1.128441 | 1.0 | 1.0 | 3.0 | 4.0 | 4.0 |
df['QT_validation'].value_counts()
2016Q3 1098 2016Q1 1091 2015Q3 1087 2016Q4 1072 2014Q4 1057 2016Q2 1006 2015Q4 974 2014Q2 933 2014Q3 923 2017Q1 919 2015Q1 917 2015Q2 898 2014Q1 345 2017Q3 108 2017Q2 94 2017Q4 65 Freq: Q-DEC, Name: QT_validation, dtype: int64
df['QT_validation'].unique()
<PeriodArray> ['2014Q2', '2014Q1', '2014Q4', '2014Q3', '2015Q1', '2015Q3', '2015Q2', '2015Q4', '2016Q1', '2016Q3', '2016Q4', '2016Q2', 'NaT', '2017Q1', '2017Q4', '2017Q2', '2017Q3'] Length: 17, dtype: period[Q-DEC]
#seperate the plot granular by passing the values clearly
sns.pairplot(df,hue='Service Area')
plt.show()
#understanding the service area presentin the data
display(df['Service Area'].unique())
print("total_unique service area",df['Service Area'].nunique())
display(HTML("<hr>"))
display(df['Service Area'].value_counts())
temp = df.groupby('Service Area')
for i in temp:
display(i)
def summary_table(data,group,value):
"""
arg: data--> dataframe given
arg: group--> list of columns to be group by
arg: value--> column where stats are need to be calcualted
func: grouping the df by the columns mentioned
return: dataframe confined by the parameters passed by
"""
output=df.groupby(group)[value].agg(['count', 'mean', 'median','sum']).reset_index()
print("created a dataframe with columns grouped by: {}\n values are aggregated by: {}".format(group,value))
return output
sa_year_qrt=summary_table(df,list(('Service Area', 'year', 'Quarter')),'JV Value')
sa_year=summary_table(df,list(('Service Area', 'year')),'JV Value')
created a dataframe with columns grouped by: ['Service Area', 'year', 'Quarter'] values are aggregated by: JV Value created a dataframe with columns grouped by: ['Service Area', 'year'] values are aggregated by: JV Value
ad_year_qt=summary_table(df,list(('Account Description', 'year', 'Quarter')),'JV Value')
ad_year=summary_table(df,list(('Account Description', 'year')),'JV Value')
created a dataframe with columns grouped by: ['Account Description', 'year', 'Quarter'] values are aggregated by: JV Value created a dataframe with columns grouped by: ['Account Description', 'year'] values are aggregated by: JV Value
#summary view of the transactions for each Service Area
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
#displaying the created dataframe through groupby function for the second part of the question
sa_year_qrt[:10].T
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | |
|---|---|---|---|---|---|---|---|---|---|---|
| Service Area | Adults and Communities | Adults and Communities | Adults and Communities | Adults and Communities | Adults and Communities | Adults and Communities | Adults and Communities | Adults and Communities | Adults and Communities | Adults and Communities |
| year | 2014.0 | 2014.0 | 2014.0 | 2014.0 | 2015.0 | 2015.0 | 2015.0 | 2015.0 | 2016.0 | 2016.0 |
| Quarter | Q1 | Q2 | Q3 | Q4 | Q1 | Q2 | Q3 | Q4 | Q1 | Q2 |
| count | 2 | 15 | 11 | 14 | 7 | 10 | 19 | 17 | 23 | 38 |
| mean | 225.5 | 4793.4 | 228.545455 | 3502.0 | 1044.285714 | 14753.0 | 3990.526316 | 1470.0 | 8437.130435 | 19754.342105 |
| median | 225.5 | 207.0 | 124.0 | 198.0 | 268.0 | 205.0 | 164.0 | 1667.0 | 3247.0 | 2673.5 |
| sum | 451.0 | 71901.0 | 2514.0 | 49028.0 | 7310.0 | 147530.0 | 75820.0 | 24990.0 | 194054.0 | 750665.0 |
ad_year_qt[:10].T
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | |
|---|---|---|---|---|---|---|---|---|---|---|
| Account Description | Advertising | Advertising | Advertising | Advertising | Advertising | Advertising | Advertising | Advertising for staff | Books-CDs-Audio-Video | Books-CDs-Audio-Video |
| year | 2014.0 | 2014.0 | 2014.0 | 2015.0 | 2015.0 | 2015.0 | 2016.0 | 2016.0 | 2014.0 | 2014.0 |
| Quarter | Q2 | Q3 | Q4 | Q1 | Q2 | Q4 | Q4 | Q2 | Q1 | Q2 |
| count | 8 | 3 | 5 | 2 | 2 | 2 | 4 | 1 | 25 | 50 |
| mean | 4604.0 | 5825.0 | 4659.4 | 5825.0 | 2927.0 | 115110.0 | 47692.5 | 45000.0 | 1547.36 | 5371.68 |
| median | 5216.0 | 5825.0 | 5825.0 | 5825.0 | 2927.0 | 115110.0 | 60000.0 | 45000.0 | 411.0 | 1513.5 |
| sum | 36832.0 | 17475.0 | 23297.0 | 11650.0 | 5854.0 | 230220.0 | 190770.0 | 45000.0 | 38684.0 | 268584.0 |
#experimenting with the tables to create subset of the dataframe for plotting
sa_year_qrt[sa_year_qrt['Service Area']=="Adults and Communities"][['year','Quarter','count']].T
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| year | 2014.0 | 2014.0 | 2014.0 | 2014.0 | 2015.0 | 2015.0 | 2015.0 | 2015.0 | 2016.0 | 2016.0 | 2016.0 | 2016.0 | 2017.0 | 2017.0 | 2017.0 | 2017.0 |
| Quarter | Q1 | Q2 | Q3 | Q4 | Q1 | Q2 | Q3 | Q4 | Q1 | Q2 | Q3 | Q4 | Q1 | Q2 | Q3 | Q4 |
| count | 2 | 15 | 11 | 14 | 7 | 10 | 19 | 17 | 23 | 38 | 33 | 36 | 37 | 6 | 7 | 3 |
sa_year_qrt[sa_year_qrt['Service Area']=="Adults and Communities"][['year','Quarter','sum']].T
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| year | 2014.0 | 2014.0 | 2014.0 | 2014.0 | 2015.0 | 2015.0 | 2015.0 | 2015.0 | 2016.0 | 2016.0 | 2016.0 | 2016.0 | 2017.0 | 2017.0 | 2017.0 | 2017.0 |
| Quarter | Q1 | Q2 | Q3 | Q4 | Q1 | Q2 | Q3 | Q4 | Q1 | Q2 | Q3 | Q4 | Q1 | Q2 | Q3 | Q4 |
| sum | 451.0 | 71901.0 | 2514.0 | 49028.0 | 7310.0 | 147530.0 | 75820.0 | 24990.0 | 194054.0 | 750665.0 | 480493.0 | 667573.0 | 500621.0 | 15934.0 | 48387.0 | 359511.0 |
def plot_q1(data,col,val):
"""
arg: data --> dataframe passed to get the plot
arg: col --> column for which individual plots are need to be created
arg: val --> what kind of observation needed (mean,sum,count)
fun: plot bar chart
return None
"""
count=0
col_unique_values=data[col].unique()
for i in col_unique_values:
plot_df=data[data[col]==i][['year','Quarter',val]]
count+=1
plt.figure(figsize=(14,6))
sns.barplot(data=plot_df,x='year',y=val,hue='Quarter')
plt.title("{} of Transcation for {}".format(val,col))
plt.xlabel("YEAR with unstacked Quarters")
plt.ylabel(val)
plt.tight_layout()
plt.show()
def plot_q1a(data,col,val):
"""
arg: data --> dataframe passed to get the plot
arg: col --> column for summary creation
arg: val --> column to apply sum
fun: plot summary bar chart
return None
"""
summary_df = data.groupby([col, 'year', 'Quarter'])[val].sum().reset_index()
plt.figure(figsize=(12, 8))
a=sns.barplot(data=summary_df, x='Quarter', y=val, hue='year',errorbar=None)
plt.title('Summary Plot: {}'.format(col))
plt.xlabel('Quarters')
plt.ylabel("summation of {}".format(val))
a.yaxis.set_major_formatter(FuncFormatter(lambda x, _: format(int(x), ',')))
plt.tight_layout()
plt.show()
plot_q1(sa_year_qrt,'Service Area','sum') #passing service area column for stats
plot_q1(ad_year_qt,'Account Description','sum') #passing account column for stats
plot_q1a(df,'Service Area','JV Value')
plot_q1a(df,'Account Description','JV Value')
+++++++++++++++++++++++++++++++++++++++++++++++++
#created few more tables for easy plotting
df_filtered = df.dropna(subset=['QT_validation'])
df_filtered['Quarter'] = df_filtered['Transaction Date'].dt.to_period('Q').astype(str) #the formart was creating issue so converted
## custom list of year-quarter
quarter_order = ['2014Q1', '2014Q2', '2014Q3', '2014Q4','2015Q1', '2015Q2', '2015Q3', '2015Q4','2016Q1', '2016Q2', '2016Q3', '2016Q4','2017Q1', '2017Q2', '2017Q3', '2017Q4']
df_filtered['Quarter'] = pd.Categorical(df_filtered['Quarter'], categories=quarter_order, ordered=True) #to control the order issue araised
#plotting the transaction amount
def spike_plot(data,x,y,col_temp=None):
"""
arg:data--> dataframe
arg:xaxis value
arg:yaxis value
col_temp: col parameter for relplot
"""
if col_temp:
g = sns.relplot(data=data, x=x, y=y, kind='line', col=col_temp,
col_wrap=2, aspect=2, height=3, linewidth=2)
g.set_titles('{col_name}')
g.set_axis_labels('Quarter', 'Transaction Amount')
# Adjust x-ticks for all subplots
for ax in g.axes.flatten():
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
plt.tight_layout()
plt.show()
else:
sns.relplot(data=data, x=x, y=y, kind='line',height=6, linewidth=2,aspect=3,ci=None)
plt.title('Transaction Amount')
plt.xlabel('Quarter-Year')
plt.ylabel('Transaction Amount')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
spike_plot(df,'Transaction Date','JV Value')
spike_plot(df_filtered,'Quarter','JV Value',col_temp='Service Area')
spike_plot(df_filtered,'year','JV Value')
spike_plot(df_filtered,'Quarter','JV Value')
spike_plot(df_filtered,'Quarter','JV Value',col_temp='Account Description')
#observation through z-score and percentage differnce in the transaction amount
sp_const=5
per_const=2
df['zscore'] = df.groupby(['Service Area', 'Account Description', 'QT_validation'])['JV Value'].transform(lambda x: (x - x.mean()) / x.std())
#artbitary value of 5 as treshold
z_score_spike=df[df['zscore']>sp_const]
z_score_non_spike=df[df['zscore']<per_const]
#using percentage differnce bwt consecutive values
#first date is ordered in ascending
df_sorted = df.groupby(['Service Area', 'Account Description']).apply(lambda x: x.sort_values('Transaction Date')).reset_index(drop=True)
df_sorted['%change'] = df_sorted.groupby(['Service Area', 'Account Description'])['JV Value'].pct_change()
percent_spike=df_sorted[df_sorted['%change']>sp_const]
percent_non_spike=df_sorted[df_sorted['%change']<per_const]
z_score_spike[z_score_spike["Service Area"]=="Childrens Services"][['JV Value','Service Area','zscore']].head()
| JV Value | Service Area | zscore | |
|---|---|---|---|
| 322 | 32943.0 | Childrens Services | 5.207716 |
| 494 | 68664.0 | Childrens Services | 5.493995 |
| 500 | 196000.0 | Childrens Services | 6.986634 |
| 673 | 12999.0 | Childrens Services | 7.431291 |
| 758 | 45672.0 | Childrens Services | 6.695405 |
percent_spike[percent_spike["Service Area"]=="Childrens Services"][['JV Value','Service Area','%change']].head()
| JV Value | Service Area | %change | |
|---|---|---|---|
| 9278 | 5997.0 | Childrens Services | 855.714286 |
| 9280 | 2274.0 | Childrens Services | 5.497143 |
| 9285 | 433.0 | Childrens Services | 47.111111 |
| 9286 | 6944.0 | Childrens Services | 15.036952 |
| 9290 | 29789.0 | Childrens Services | 11.991278 |
percent_non_spike[percent_non_spike["Service Area"]=="Childrens Services"][['JV Value','Service Area','%change']].head()
| JV Value | Service Area | %change | |
|---|---|---|---|
| 9275 | 222.0 | Childrens Services | -0.555110 |
| 9277 | 7.0 | Childrens Services | -0.989781 |
| 9279 | 350.0 | Childrens Services | -0.941637 |
| 9281 | 249.0 | Childrens Services | -0.890501 |
| 9282 | 252.0 | Childrens Services | 0.012048 |
z_score_non_spike[z_score_non_spike["Service Area"]=="Childrens Services"][['JV Value','Service Area','zscore']].head()
| JV Value | Service Area | zscore | |
|---|---|---|---|
| 1 | 600000.0 | Childrens Services | 1.788670 |
| 2 | 30938.0 | Childrens Services | 0.707107 |
| 3 | 21876.0 | Childrens Services | -0.707107 |
| 4 | 306.0 | Childrens Services | -0.356084 |
| 5 | 732.0 | Childrens Services | -0.471575 |
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#creating pipeline for clustering the service area by transaction count and total
def similarity(data,col,val,k):
"""
arg:data--> dataframe
arg:col--> column which has to be clustered
arg:val--> to be clustered of
arg:k--> cluster number for kmeans
function: to create a clustering model
returns dataframe with culster number
"""
temp=data.groupby(col)[val].agg(['count', 'sum', 'mean', 'median', 'std']).reset_index()
temp=temp.dropna()
# temp = temp.replace([np.inf, -np.inf], np.nan).dropna()
std= StandardScaler()
scaled=std.fit_transform(temp.drop(col, axis=1))
kmeans = KMeans(n_clusters=k)
kmeans.fit(scaled)
temp['cluster_id']=kmeans.labels_
plt.figure(figsize=(14,7))
sns.scatterplot(data=temp,x='sum',y='mean',hue='cluster_id',palette='viridis',s=100)
plt.legend()
plt.title("similarity plot")
plt.grid(True)
plt.show()
# plt.figure(figsize=(10, 6))
# sns.scatterplot(x='sum', y='sum', hue='cluster_id', data=a, palette='viridis', s=100)
# plt.title('Service Areas Grouped by Spending Behavior')
# plt.xlabel('Average Transaction Amount')
# plt.ylabel('Total Transaction Amount')
# plt.legend(title='Cluster')
# plt.grid(True)
# plt.show()
return temp,scaled
def hac(data,scale,link,col):
"""
arg: data--> dataframe
arg: scaled--> scaled numeric value
arg: linkage method
arg:leaves of dendogram by which column
functon: creating HAC plot
return: None
"""
cal_temp=linkage(scale,method=link)
plt.figure(figsize=(14,7))
dendrogram(cal_temp,labels=data[col].values,leaf_rotation=90)
plt.title('HAC')
plt.xlabel(col)
plt.show()
clus_analysis,scaled=similarity(df,'Service Area','JV Value',3)
clus_analysis[:10].T
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | |
|---|---|---|---|---|---|---|---|---|---|---|
| Service Area | Adults and Communities | Assurance | CSG Managed Budget | Children's Education & Skills | Children's Family Services | Children's Service DSG | Childrens Services | Commercial | Commissioning | Control Accounts |
| count | 278 | 344 | 36 | 667 | 7672 | 277 | 1297 | 9 | 400 | 8 |
| sum | 3396782.0 | 1829415.0 | 4290803.0 | 4128956.0 | 44773136.0 | 1757715.0 | 6194487.0 | 207786.0 | 5934935.0 | 19071.0 |
| mean | 12218.640288 | 5318.06686 | 119188.972222 | 6190.338831 | 5835.914494 | 6345.541516 | 4776.011565 | 23087.333333 | 14837.3375 | 2383.875 |
| median | 1831.0 | 900.0 | 4564.0 | 1385.0 | 1797.5 | 1894.0 | 675.0 | 594.0 | 4612.0 | 813.5 |
| std | 36217.416042 | 27137.891992 | 213468.294515 | 12512.61528 | 12028.966756 | 13725.713997 | 20421.416626 | 37508.583144 | 37451.003617 | 3146.298616 |
| cluster_id | 1 | 1 | 0 | 1 | 2 | 1 | 1 | 1 | 1 | 1 |
clus_analysis[clus_analysis['cluster_id']==0]
| Service Area | count | sum | mean | median | std | cluster_id | |
|---|---|---|---|---|---|---|---|
| 2 | CSG Managed Budget | 36 | 4290803.0 | 119188.972222 | 4564.0 | 213468.294515 | 0 |
| 10 | Customer Support Group | 117 | 16204913.0 | 138503.529915 | 10399.0 | 267847.540772 | 0 |
| 14 | Governance | 8 | 1376694.0 | 172086.750000 | 24943.5 | 288737.362988 | 0 |
hac(clus_analysis,scaled,'ward','Service Area')
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
def repeating_values(data,check_col,on_col,repeat):
"""
arg:data-->dataframe
arg:check_col-->column to be looked for misclassifications
arg:on_col-->against which column
arg:repeat--> repetation allowed
function: calculating the frequency of the column values
return: dataframe
"""
grp=data.groupby([check_col,on_col]).size().reset_index(name="count")
frq=grp.groupby(check_col)[on_col].count()
repeating_cond = frq[frq > 1]
top_inst=repeating_cond.sort_values(ascending=False)[:25]
plt.figure(figsize=(14,7))
plt.bar(top_inst.index,top_inst.values)
plt.xlabel('creditor name')
plt.ylabel('number of accounts present in')
plt.xticks(rotation=90)
plt.tight_layout()
plt.show()
return top_inst,repeating_cond
top_list,whole_list=repeating_values(df,'Creditor','Account Description',2)
top_list.head()
Creditor AMAZON UK MARKETPLACE 25 AMAZON EU 18 AMAZON UK RETAIL AMAZO 15 AMAZON SVCS EUROPE,SAR 14 ASDA SUPERSTORE 13 Name: Account Description, dtype: int64
whole_list.shape[0] #total of 474 creditors have been mapped into 3 or more accounts
474
+++++++++++++++++++++++++++++++++++++++++++++++++++++++
##anomaly detection
#creating dataframe based on weekly transaction sum
def anamoly(data,date_col,grp_col,val_col):
"""
arg:data--> dataframe to be checked for anomaly
arg:date_col--> considering anomaly has strong correlation with time ; passing the required data
arg:grp_col-->which column is checked for the anomaly
arg:val_col--> which column is used to check the anomaly
func: basic plots of the anomaly in the transactions weekly
return: supported dataframe
"""
data[date_col] = pd.to_datetime(data[date_col])
data[val_col]=pd.to_numeric(df2[val_col],errors='coerce')
temp_grp=data.groupby([grp_col,data[date_col].dt.week])
weekly_values=temp_grp[val_col].sum().reset_index() #
treshold=2 * weekly_values.groupby(grp_col)[val_col].transform('std') #
week_anomaly=weekly_values.copy()
anomaly=week_anomaly[week_anomaly[val_col] > treshold]
week_anomaly['Anomaly']=week_anomaly[val_col]>treshold#
# week_anomaly['Anomaly']=week_anomaly[val_col]>threshold#
for service_area,grp in week_anomaly.groupby(grp_col):
an_temp=grp[val_col]>treshold[grp.index]
plt.figure(figsize=(14,7))
plt.plot(grp[date_col], grp[val_col], label='Normal Transactions', color='blue')
plt.scatter(grp.loc[an_temp,date_col],grp.loc[an_temp,val_col],color='red',label='Anomalies')
plt.title(service_area)
plt.legend()
plt.grid(True)
plt.show()
return weekly_values,week_anomaly,anomaly,treshold
weekly_trans_sum,anomaly_encoded_df,only_anomaly,treshold=anamoly(df,'Transaction Date','Service Area','JV Value')
weekly_trans_sum
| Service Area | Transaction Date | JV Value | |
|---|---|---|---|
| 0 | Adults and Communities | 1.0 | 0.00 |
| 1 | Adults and Communities | 2.0 | 36.85 |
| 2 | Adults and Communities | 3.0 | 42.50 |
| 3 | Adults and Communities | 4.0 | 0.00 |
| 4 | Adults and Communities | 5.0 | 13.85 |
| 5 | Adults and Communities | 6.0 | 41.93 |
| 6 | Adults and Communities | 7.0 | 0.00 |
| 7 | Adults and Communities | 8.0 | 0.00 |
| 8 | Adults and Communities | 9.0 | 0.00 |
| 9 | Adults and Communities | 10.0 | 0.00 |
| 10 | Adults and Communities | 11.0 | 0.00 |
| 11 | Adults and Communities | 12.0 | 0.00 |
| 12 | Adults and Communities | 13.0 | 0.00 |
| 13 | Adults and Communities | 14.0 | 0.00 |
| 14 | Adults and Communities | 15.0 | 5.35 |
| 15 | Adults and Communities | 16.0 | 28.66 |
| 16 | Adults and Communities | 17.0 | 0.00 |
| 17 | Adults and Communities | 18.0 | 61.04 |
| 18 | Adults and Communities | 19.0 | 67.83 |
| 19 | Adults and Communities | 20.0 | 8.03 |
| 20 | Adults and Communities | 21.0 | 0.00 |
| 21 | Adults and Communities | 22.0 | 64.38 |
| 22 | Adults and Communities | 23.0 | 0.00 |
| 23 | Adults and Communities | 24.0 | 20.00 |
| 24 | Adults and Communities | 25.0 | 0.05 |
| 25 | Adults and Communities | 26.0 | -3.02 |
| 26 | Adults and Communities | 27.0 | 242.46 |
| 27 | Adults and Communities | 28.0 | 0.00 |
| 28 | Adults and Communities | 29.0 | 0.00 |
| 29 | Adults and Communities | 30.0 | 0.00 |
| 30 | Adults and Communities | 31.0 | 109.40 |
| 31 | Adults and Communities | 32.0 | 20.00 |
| 32 | Adults and Communities | 33.0 | 74.75 |
| 33 | Adults and Communities | 34.0 | 431.05 |
| 34 | Adults and Communities | 35.0 | 0.00 |
| 35 | Adults and Communities | 36.0 | 113.05 |
| 36 | Adults and Communities | 37.0 | 27.50 |
| 37 | Adults and Communities | 38.0 | 16.67 |
| 38 | Adults and Communities | 39.0 | 0.00 |
| 39 | Adults and Communities | 40.0 | 0.00 |
| 40 | Adults and Communities | 41.0 | 1079.50 |
| 41 | Adults and Communities | 42.0 | 20.00 |
| 42 | Adults and Communities | 43.0 | 20.00 |
| 43 | Adults and Communities | 44.0 | 32.05 |
| 44 | Adults and Communities | 45.0 | 26.12 |
| 45 | Adults and Communities | 46.0 | 125.01 |
| 46 | Adults and Communities | 47.0 | 0.00 |
| 47 | Adults and Communities | 48.0 | 103.90 |
| 48 | Adults and Communities | 49.0 | 56.50 |
| 49 | Adults and Communities | 50.0 | 0.00 |
| 50 | Adults and Communities | 51.0 | 5.00 |
| 51 | Adults and Communities | 52.0 | 18.00 |
| 52 | Adults and Communities | 53.0 | 0.00 |
| 53 | Assurance | 1.0 | 0.00 |
| 54 | Assurance | 2.0 | 0.00 |
| 55 | Assurance | 3.0 | 0.00 |
| 56 | Assurance | 4.0 | 13.85 |
| 57 | Assurance | 5.0 | 39.85 |
| 58 | Assurance | 6.0 | 0.00 |
| 59 | Assurance | 7.0 | 0.00 |
| 60 | Assurance | 8.0 | 0.00 |
| 61 | Assurance | 9.0 | 0.00 |
| 62 | Assurance | 10.0 | 0.00 |
| 63 | Assurance | 11.0 | 0.00 |
| 64 | Assurance | 12.0 | 0.00 |
| 65 | Assurance | 13.0 | 0.00 |
| 66 | Assurance | 14.0 | 0.00 |
| 67 | Assurance | 15.0 | 0.00 |
| 68 | Assurance | 16.0 | 0.00 |
| 69 | Assurance | 17.0 | 0.00 |
| 70 | Assurance | 18.0 | 0.00 |
| 71 | Assurance | 19.0 | 0.00 |
| 72 | Assurance | 20.0 | 0.00 |
| 73 | Assurance | 21.0 | 0.00 |
| 74 | Assurance | 22.0 | 0.00 |
| 75 | Assurance | 23.0 | 0.00 |
| 76 | Assurance | 24.0 | 7.30 |
| 77 | Assurance | 25.0 | 0.00 |
| 78 | Assurance | 26.0 | 0.00 |
| 79 | Assurance | 27.0 | 0.00 |
| 80 | Assurance | 28.0 | 0.00 |
| 81 | Assurance | 29.0 | 0.00 |
| 82 | Assurance | 30.0 | 0.00 |
| 83 | Assurance | 31.0 | 0.00 |
| 84 | Assurance | 32.0 | 0.00 |
| 85 | Assurance | 33.0 | 0.00 |
| 86 | Assurance | 34.0 | 0.00 |
| 87 | Assurance | 35.0 | 0.00 |
| 88 | Assurance | 36.0 | 0.00 |
| 89 | Assurance | 37.0 | 0.00 |
| 90 | Assurance | 38.0 | 0.00 |
| 91 | Assurance | 39.0 | 0.00 |
| 92 | Assurance | 40.0 | 0.00 |
| 93 | Assurance | 41.0 | 0.00 |
| 94 | Assurance | 42.0 | 0.00 |
| 95 | Assurance | 43.0 | 0.00 |
| 96 | Assurance | 44.0 | 0.00 |
| 97 | Assurance | 45.0 | 0.00 |
| 98 | Assurance | 46.0 | 83.25 |
| 99 | Assurance | 47.0 | -1.45 |
| 100 | Assurance | 48.0 | 0.00 |
| 101 | Assurance | 49.0 | 0.00 |
| 102 | Assurance | 50.0 | 0.00 |
| 103 | Assurance | 51.0 | 0.00 |
| 104 | Assurance | 52.0 | 0.00 |
| 105 | Assurance | 53.0 | 0.00 |
| 106 | CSG Managed Budget | 2.0 | 182.08 |
| 107 | CSG Managed Budget | 6.0 | 418.46 |
| 108 | CSG Managed Budget | 19.0 | 30.50 |
| 109 | CSG Managed Budget | 20.0 | 13.40 |
| 110 | CSG Managed Budget | 21.0 | 147.18 |
| 111 | CSG Managed Budget | 22.0 | 84.08 |
| 112 | CSG Managed Budget | 25.0 | 3.47 |
| 113 | CSG Managed Budget | 28.0 | 106.87 |
| 114 | CSG Managed Budget | 30.0 | 150.54 |
| 115 | CSG Managed Budget | 31.0 | 96.20 |
| 116 | CSG Managed Budget | 34.0 | 56.05 |
| 117 | CSG Managed Budget | 36.0 | 12.52 |
| 118 | CSG Managed Budget | 39.0 | 20.00 |
| 119 | CSG Managed Budget | 42.0 | 50.00 |
| 120 | CSG Managed Budget | 43.0 | 78.90 |
| 121 | CSG Managed Budget | 49.0 | 157.06 |
| 122 | Children's Education & Skills | 2.0 | 0.00 |
| 123 | Children's Education & Skills | 3.0 | 981.49 |
| 124 | Children's Education & Skills | 4.0 | 1238.16 |
| 125 | Children's Education & Skills | 5.0 | 549.83 |
| 126 | Children's Education & Skills | 6.0 | 0.00 |
| 127 | Children's Education & Skills | 7.0 | 237.74 |
| 128 | Children's Education & Skills | 8.0 | 4.50 |
| 129 | Children's Education & Skills | 9.0 | 0.00 |
| 130 | Children's Education & Skills | 10.0 | 0.00 |
| 131 | Children's Education & Skills | 11.0 | 33.00 |
| 132 | Children's Education & Skills | 14.0 | 0.00 |
| 133 | Children's Education & Skills | 15.0 | 852.28 |
| 134 | Children's Education & Skills | 16.0 | 0.00 |
| 135 | Children's Education & Skills | 18.0 | 227.50 |
| 136 | Children's Education & Skills | 19.0 | 5.00 |
| 137 | Children's Education & Skills | 20.0 | 160.71 |
| 138 | Children's Education & Skills | 21.0 | 0.00 |
| 139 | Children's Education & Skills | 22.0 | 0.00 |
| 140 | Children's Education & Skills | 23.0 | 299.64 |
| 141 | Children's Education & Skills | 24.0 | 77.47 |
| 142 | Children's Education & Skills | 25.0 | 0.00 |
| 143 | Children's Education & Skills | 26.0 | 0.00 |
| 144 | Children's Education & Skills | 27.0 | 558.32 |
| 145 | Children's Education & Skills | 28.0 | 69.50 |
| 146 | Children's Education & Skills | 29.0 | 0.00 |
| 147 | Children's Education & Skills | 30.0 | 0.00 |
| 148 | Children's Education & Skills | 31.0 | 85.98 |
| 149 | Children's Education & Skills | 32.0 | 0.00 |
| 150 | Children's Education & Skills | 33.0 | 100.50 |
| 151 | Children's Education & Skills | 34.0 | 0.00 |
| 152 | Children's Education & Skills | 35.0 | 0.00 |
| 153 | Children's Education & Skills | 36.0 | 173.94 |
| 154 | Children's Education & Skills | 37.0 | 74.13 |
| 155 | Children's Education & Skills | 38.0 | 0.00 |
| 156 | Children's Education & Skills | 39.0 | 0.00 |
| 157 | Children's Education & Skills | 40.0 | 0.00 |
| 158 | Children's Education & Skills | 41.0 | 503.48 |
| 159 | Children's Education & Skills | 42.0 | 0.00 |
| 160 | Children's Education & Skills | 43.0 | 0.00 |
| 161 | Children's Education & Skills | 44.0 | 0.00 |
| 162 | Children's Education & Skills | 45.0 | 350.00 |
| 163 | Children's Education & Skills | 46.0 | 446.98 |
| 164 | Children's Education & Skills | 47.0 | 485.47 |
| 165 | Children's Education & Skills | 48.0 | 1121.65 |
| 166 | Children's Education & Skills | 49.0 | 629.66 |
| 167 | Children's Education & Skills | 50.0 | 202.41 |
| 168 | Children's Education & Skills | 51.0 | 1441.11 |
| 169 | Children's Education & Skills | 52.0 | 6.98 |
| 170 | Children's Education & Skills | 53.0 | 0.00 |
| 171 | Children's Family Services | 1.0 | 957.04 |
| 172 | Children's Family Services | 2.0 | 1601.09 |
| 173 | Children's Family Services | 3.0 | 9094.78 |
| 174 | Children's Family Services | 4.0 | 6305.44 |
| 175 | Children's Family Services | 5.0 | 10119.26 |
| 176 | Children's Family Services | 6.0 | 3211.83 |
| 177 | Children's Family Services | 7.0 | 1943.75 |
| 178 | Children's Family Services | 8.0 | 2735.92 |
| 179 | Children's Family Services | 9.0 | 3095.46 |
| 180 | Children's Family Services | 10.0 | 320.75 |
| 181 | Children's Family Services | 11.0 | 95.83 |
| 182 | Children's Family Services | 12.0 | 0.00 |
| 183 | Children's Family Services | 13.0 | 0.00 |
| 184 | Children's Family Services | 14.0 | 1261.34 |
| 185 | Children's Family Services | 15.0 | 1944.05 |
| 186 | Children's Family Services | 16.0 | 0.00 |
| 187 | Children's Family Services | 17.0 | 0.00 |
| 188 | Children's Family Services | 18.0 | 2040.11 |
| 189 | Children's Family Services | 19.0 | 50.89 |
| 190 | Children's Family Services | 20.0 | 660.31 |
| 191 | Children's Family Services | 21.0 | 0.00 |
| 192 | Children's Family Services | 22.0 | 0.00 |
| 193 | Children's Family Services | 23.0 | 2530.23 |
| 194 | Children's Family Services | 24.0 | 810.14 |
| 195 | Children's Family Services | 25.0 | 0.00 |
| 196 | Children's Family Services | 26.0 | 0.00 |
| 197 | Children's Family Services | 27.0 | 1077.74 |
| 198 | Children's Family Services | 28.0 | 673.90 |
| 199 | Children's Family Services | 29.0 | 0.00 |
| 200 | Children's Family Services | 30.0 | 0.00 |
| 201 | Children's Family Services | 31.0 | 857.39 |
| 202 | Children's Family Services | 32.0 | 0.00 |
| 203 | Children's Family Services | 33.0 | 986.25 |
| 204 | Children's Family Services | 34.0 | 0.00 |
| 205 | Children's Family Services | 35.0 | 0.00 |
| 206 | Children's Family Services | 36.0 | 2701.74 |
| 207 | Children's Family Services | 37.0 | 619.87 |
| 208 | Children's Family Services | 38.0 | 0.00 |
| 209 | Children's Family Services | 39.0 | 0.00 |
| 210 | Children's Family Services | 40.0 | 314.57 |
| 211 | Children's Family Services | 41.0 | 2248.31 |
| 212 | Children's Family Services | 42.0 | 0.00 |
| 213 | Children's Family Services | 43.0 | 0.00 |
| 214 | Children's Family Services | 44.0 | 0.00 |
| 215 | Children's Family Services | 45.0 | 114.17 |
| 216 | Children's Family Services | 46.0 | 1897.92 |
| 217 | Children's Family Services | 47.0 | 7360.60 |
| 218 | Children's Family Services | 48.0 | 5911.19 |
| 219 | Children's Family Services | 49.0 | 2234.96 |
| 220 | Children's Family Services | 50.0 | 1154.23 |
| 221 | Children's Family Services | 51.0 | 4995.83 |
| 222 | Children's Family Services | 52.0 | 2433.27 |
| 223 | Children's Family Services | 53.0 | 0.00 |
| 224 | Children's Service DSG | 2.0 | 128.90 |
| 225 | Children's Service DSG | 3.0 | 0.00 |
| 226 | Children's Service DSG | 4.0 | 0.00 |
| 227 | Children's Service DSG | 5.0 | 13.85 |
| 228 | Children's Service DSG | 6.0 | 739.75 |
| 229 | Children's Service DSG | 7.0 | 124.71 |
| 230 | Children's Service DSG | 8.0 | 0.00 |
| 231 | Children's Service DSG | 9.0 | 0.00 |
| 232 | Children's Service DSG | 10.0 | 0.00 |
| 233 | Children's Service DSG | 11.0 | 180.45 |
| 234 | Children's Service DSG | 12.0 | 0.00 |
| 235 | Children's Service DSG | 13.0 | 0.00 |
| 236 | Children's Service DSG | 14.0 | 0.00 |
| 237 | Children's Service DSG | 15.0 | 20.00 |
| 238 | Children's Service DSG | 16.0 | 0.00 |
| 239 | Children's Service DSG | 17.0 | 0.00 |
| 240 | Children's Service DSG | 18.0 | 0.00 |
| 241 | Children's Service DSG | 19.0 | 303.77 |
| 242 | Children's Service DSG | 20.0 | 15.77 |
| 243 | Children's Service DSG | 21.0 | 0.00 |
| 244 | Children's Service DSG | 22.0 | 0.00 |
| 245 | Children's Service DSG | 23.0 | 64.48 |
| 246 | Children's Service DSG | 24.0 | 127.30 |
| 247 | Children's Service DSG | 25.0 | 0.00 |
| 248 | Children's Service DSG | 26.0 | 0.00 |
| 249 | Children's Service DSG | 27.0 | 0.00 |
| 250 | Children's Service DSG | 28.0 | 79.98 |
| 251 | Children's Service DSG | 29.0 | 0.00 |
| 252 | Children's Service DSG | 30.0 | 0.00 |
| 253 | Children's Service DSG | 31.0 | 41.98 |
| 254 | Children's Service DSG | 32.0 | 0.00 |
| 255 | Children's Service DSG | 33.0 | 578.61 |
| 256 | Children's Service DSG | 34.0 | 252.05 |
| 257 | Children's Service DSG | 35.0 | 33.00 |
| 258 | Children's Service DSG | 36.0 | 252.96 |
| 259 | Children's Service DSG | 37.0 | 298.16 |
| 260 | Children's Service DSG | 38.0 | 8.15 |
| 261 | Children's Service DSG | 39.0 | 365.93 |
| 262 | Children's Service DSG | 40.0 | 0.00 |
| 263 | Children's Service DSG | 42.0 | 194.32 |
| 264 | Children's Service DSG | 43.0 | 192.20 |
| 265 | Children's Service DSG | 44.0 | 40.00 |
| 266 | Children's Service DSG | 45.0 | 0.00 |
| 267 | Children's Service DSG | 46.0 | 101.28 |
| 268 | Children's Service DSG | 47.0 | 14.37 |
| 269 | Children's Service DSG | 48.0 | 572.87 |
| 270 | Children's Service DSG | 49.0 | 0.00 |
| 271 | Children's Service DSG | 50.0 | 0.00 |
| 272 | Children's Service DSG | 51.0 | 96.39 |
| 273 | Children's Service DSG | 53.0 | 0.00 |
| 274 | Childrens Services | 1.0 | 273.65 |
| 275 | Childrens Services | 2.0 | 2893.47 |
| 276 | Childrens Services | 6.0 | 4036.87 |
| 277 | Childrens Services | 7.0 | 20.00 |
| 278 | Childrens Services | 10.0 | 2199.33 |
| 279 | Childrens Services | 11.0 | 20.00 |
| 280 | Childrens Services | 14.0 | 1229.29 |
| 281 | Childrens Services | 15.0 | 1057.00 |
| 282 | Childrens Services | 16.0 | 1828.23 |
| 283 | Childrens Services | 17.0 | 2919.19 |
| 284 | Childrens Services | 18.0 | 4870.81 |
| 285 | Childrens Services | 19.0 | 351.56 |
| 286 | Childrens Services | 20.0 | 5040.35 |
| 287 | Childrens Services | 21.0 | 3705.73 |
| 288 | Childrens Services | 22.0 | 4909.91 |
| 289 | Childrens Services | 23.0 | 548.94 |
| 290 | Childrens Services | 24.0 | 403.48 |
| 291 | Childrens Services | 25.0 | 3229.64 |
| 292 | Childrens Services | 26.0 | 1890.47 |
| 293 | Childrens Services | 27.0 | 2099.33 |
| 294 | Childrens Services | 28.0 | 2417.57 |
| 295 | Childrens Services | 29.0 | 5472.17 |
| 296 | Childrens Services | 30.0 | 7962.97 |
| 297 | Childrens Services | 31.0 | 3169.25 |
| 298 | Childrens Services | 32.0 | 3104.53 |
| 299 | Childrens Services | 36.0 | 2191.71 |
| 300 | Childrens Services | 40.0 | 938.28 |
| 301 | Childrens Services | 41.0 | 1287.93 |
| 302 | Childrens Services | 42.0 | 289.17 |
| 303 | Childrens Services | 43.0 | 167.60 |
| 304 | Childrens Services | 44.0 | 52.00 |
| 305 | Childrens Services | 45.0 | 1865.51 |
| 306 | Childrens Services | 49.0 | 1233.81 |
| 307 | Commercial | 19.0 | 114.98 |
| 308 | Commercial | 32.0 | 12.00 |
| 309 | Commercial | 34.0 | 217.75 |
| 310 | Commercial | 35.0 | 374.86 |
| 311 | Commercial | 39.0 | 173.60 |
| 312 | Commissioning | 1.0 | 204.56 |
| 313 | Commissioning | 2.0 | 60.60 |
| 314 | Commissioning | 3.0 | 57.19 |
| 315 | Commissioning | 4.0 | 43.95 |
| 316 | Commissioning | 5.0 | 296.29 |
| 317 | Commissioning | 6.0 | 0.00 |
| 318 | Commissioning | 7.0 | 41.67 |
| 319 | Commissioning | 8.0 | 4.50 |
| 320 | Commissioning | 9.0 | 0.00 |
| 321 | Commissioning | 10.0 | 0.00 |
| 322 | Commissioning | 11.0 | 206.60 |
| 323 | Commissioning | 12.0 | 0.00 |
| 324 | Commissioning | 14.0 | 0.00 |
| 325 | Commissioning | 15.0 | 8.20 |
| 326 | Commissioning | 16.0 | 0.00 |
| 327 | Commissioning | 17.0 | 0.00 |
| 328 | Commissioning | 18.0 | 227.50 |
| 329 | Commissioning | 19.0 | 0.00 |
| 330 | Commissioning | 20.0 | 295.00 |
| 331 | Commissioning | 21.0 | 0.00 |
| 332 | Commissioning | 22.0 | 0.00 |
| 333 | Commissioning | 23.0 | 2.50 |
| 334 | Commissioning | 24.0 | 0.00 |
| 335 | Commissioning | 25.0 | 0.00 |
| 336 | Commissioning | 26.0 | 0.00 |
| 337 | Commissioning | 27.0 | 0.00 |
| 338 | Commissioning | 28.0 | 11.50 |
| 339 | Commissioning | 29.0 | 0.00 |
| 340 | Commissioning | 30.0 | 0.00 |
| 341 | Commissioning | 31.0 | 23.64 |
| 342 | Commissioning | 32.0 | 0.00 |
| 343 | Commissioning | 33.0 | 125.04 |
| 344 | Commissioning | 34.0 | 0.00 |
| 345 | Commissioning | 35.0 | 0.00 |
| 346 | Commissioning | 36.0 | 17.04 |
| 347 | Commissioning | 37.0 | 0.00 |
| 348 | Commissioning | 38.0 | 0.00 |
| 349 | Commissioning | 39.0 | 0.00 |
| 350 | Commissioning | 40.0 | 0.00 |
| 351 | Commissioning | 41.0 | 883.22 |
| 352 | Commissioning | 42.0 | 0.00 |
| 353 | Commissioning | 43.0 | 0.00 |
| 354 | Commissioning | 44.0 | 0.00 |
| 355 | Commissioning | 45.0 | 38.79 |
| 356 | Commissioning | 46.0 | 146.60 |
| 357 | Commissioning | 47.0 | 119.62 |
| 358 | Commissioning | 48.0 | 420.00 |
| 359 | Commissioning | 49.0 | 0.00 |
| 360 | Commissioning | 50.0 | 34.86 |
| 361 | Commissioning | 51.0 | 398.20 |
| 362 | Commissioning | 52.0 | 123.42 |
| 363 | Commissioning | 53.0 | 0.00 |
| 364 | Control Accounts | 10.0 | 653.84 |
| 365 | Control Accounts | 14.0 | 909.28 |
| 366 | Control Accounts | 24.0 | 12.50 |
| 367 | Control Accounts | 27.0 | 268.86 |
| 368 | Customer Support Group | 2.0 | 0.00 |
| 369 | Customer Support Group | 3.0 | 17.04 |
| 370 | Customer Support Group | 4.0 | 1159.10 |
| 371 | Customer Support Group | 5.0 | 0.00 |
| 372 | Customer Support Group | 7.0 | 13.39 |
| 373 | Customer Support Group | 8.0 | 0.00 |
| 374 | Customer Support Group | 9.0 | 0.00 |
| 375 | Customer Support Group | 10.0 | 0.00 |
| 376 | Customer Support Group | 12.0 | 0.00 |
| 377 | Customer Support Group | 13.0 | 0.00 |
| 378 | Customer Support Group | 15.0 | 19.25 |
| 379 | Customer Support Group | 16.0 | 0.00 |
| 380 | Customer Support Group | 17.0 | 0.00 |
| 381 | Customer Support Group | 20.0 | 22.92 |
| 382 | Customer Support Group | 21.0 | 0.00 |
| 383 | Customer Support Group | 22.0 | 0.00 |
| 384 | Customer Support Group | 23.0 | 0.00 |
| 385 | Customer Support Group | 24.0 | 23.00 |
| 386 | Customer Support Group | 25.0 | 0.00 |
| 387 | Customer Support Group | 26.0 | 0.00 |
| 388 | Customer Support Group | 27.0 | 0.00 |
| 389 | Customer Support Group | 28.0 | 0.00 |
| 390 | Customer Support Group | 29.0 | 0.00 |
| 391 | Customer Support Group | 30.0 | 0.00 |
| 392 | Customer Support Group | 31.0 | 17.04 |
| 393 | Customer Support Group | 32.0 | 0.00 |
| 394 | Customer Support Group | 33.0 | 0.00 |
| 395 | Customer Support Group | 34.0 | 0.00 |
| 396 | Customer Support Group | 35.0 | 0.00 |
| 397 | Customer Support Group | 36.0 | 96.79 |
| 398 | Customer Support Group | 39.0 | 0.00 |
| 399 | Customer Support Group | 41.0 | 6.58 |
| 400 | Customer Support Group | 43.0 | 0.00 |
| 401 | Customer Support Group | 45.0 | 0.00 |
| 402 | Customer Support Group | 46.0 | 52.51 |
| 403 | Customer Support Group | 47.0 | 187.43 |
| 404 | Customer Support Group | 48.0 | 0.00 |
| 405 | Customer Support Group | 49.0 | 0.00 |
| 406 | Customer Support Group | 51.0 | 165.40 |
| 407 | Customer Support Group | 52.0 | 0.00 |
| 408 | Deputy Chief Operating Officer | 2.0 | 344.94 |
| 409 | Deputy Chief Operating Officer | 6.0 | 19.17 |
| 410 | Deputy Chief Operating Officer | 10.0 | 136.25 |
| 411 | Deputy Chief Operating Officer | 11.0 | 20.00 |
| 412 | Deputy Chief Operating Officer | 14.0 | 135.17 |
| 413 | Deputy Chief Operating Officer | 15.0 | 1279.90 |
| 414 | Deputy Chief Operating Officer | 17.0 | 77.13 |
| 415 | Deputy Chief Operating Officer | 18.0 | 8.32 |
| 416 | Deputy Chief Operating Officer | 19.0 | 149.87 |
| 417 | Deputy Chief Operating Officer | 20.0 | 29.98 |
| 418 | Deputy Chief Operating Officer | 21.0 | 128.21 |
| 419 | Deputy Chief Operating Officer | 22.0 | 957.20 |
| 420 | Deputy Chief Operating Officer | 24.0 | 20.00 |
| 421 | Deputy Chief Operating Officer | 25.0 | 125.06 |
| 422 | Deputy Chief Operating Officer | 27.0 | 61.55 |
| 423 | Deputy Chief Operating Officer | 29.0 | 2240.37 |
| 424 | Deputy Chief Operating Officer | 30.0 | 221.45 |
| 425 | Deputy Chief Operating Officer | 32.0 | 238.69 |
| 426 | Deputy Chief Operating Officer | 33.0 | -0.26 |
| 427 | Deputy Chief Operating Officer | 34.0 | 123.22 |
| 428 | Deputy Chief Operating Officer | 35.0 | 52.50 |
| 429 | Deputy Chief Operating Officer | 36.0 | 94.68 |
| 430 | Deputy Chief Operating Officer | 38.0 | 1073.91 |
| 431 | Deputy Chief Operating Officer | 39.0 | 534.63 |
| 432 | Deputy Chief Operating Officer | 40.0 | 319.94 |
| 433 | Deputy Chief Operating Officer | 41.0 | 154.60 |
| 434 | Deputy Chief Operating Officer | 42.0 | 354.67 |
| 435 | Deputy Chief Operating Officer | 43.0 | 238.61 |
| 436 | Deputy Chief Operating Officer | 44.0 | 20.00 |
| 437 | Deputy Chief Operating Officer | 45.0 | 114.90 |
| 438 | Deputy Chief Operating Officer | 49.0 | 39.37 |
| 439 | Deputy Chief Operating Officer | 50.0 | 180.00 |
| 440 | Education | 2.0 | 503.52 |
| 441 | Education | 6.0 | 349.76 |
| 442 | Education | 7.0 | 213.12 |
| 443 | Education | 10.0 | 419.78 |
| 444 | Education | 11.0 | 450.24 |
| 445 | Education | 24.0 | 84.00 |
| 446 | Education | 28.0 | 66.91 |
| 447 | Education | 32.0 | 1164.33 |
| 448 | Education | 33.0 | 510.10 |
| 449 | Education | 34.0 | 664.44 |
| 450 | Education | 35.0 | 260.07 |
| 451 | Education | 37.0 | 871.36 |
| 452 | Education | 38.0 | 252.00 |
| 453 | Education | 39.0 | 1531.51 |
| 454 | Education | 40.0 | 158.07 |
| 455 | Education | 41.0 | 50.51 |
| 456 | Education | 42.0 | 1080.05 |
| 457 | Education | 43.0 | 323.89 |
| 458 | Education | 44.0 | 845.36 |
| 459 | Education | 45.0 | 8.41 |
| 460 | Education | 50.0 | 153.00 |
| 461 | Family Services | 2.0 | 3053.45 |
| 462 | Family Services | 6.0 | 621.65 |
| 463 | Family Services | 7.0 | 973.66 |
| 464 | Family Services | 10.0 | -970.08 |
| 465 | Family Services | 11.0 | 1968.27 |
| 466 | Family Services | 15.0 | 2650.57 |
| 467 | Family Services | 19.0 | 1554.72 |
| 468 | Family Services | 23.0 | 533.99 |
| 469 | Family Services | 24.0 | 214.98 |
| 470 | Family Services | 28.0 | 1991.99 |
| 471 | Family Services | 32.0 | 5492.32 |
| 472 | Family Services | 33.0 | 4443.37 |
| 473 | Family Services | 34.0 | 7192.16 |
| 474 | Family Services | 35.0 | 3670.33 |
| 475 | Family Services | 37.0 | 3039.46 |
| 476 | Family Services | 38.0 | 3918.12 |
| 477 | Family Services | 39.0 | 6926.76 |
| 478 | Family Services | 40.0 | 2165.64 |
| 479 | Family Services | 41.0 | 973.05 |
| 480 | Family Services | 42.0 | 5405.30 |
| 481 | Family Services | 43.0 | 3497.24 |
| 482 | Family Services | 44.0 | 4693.72 |
| 483 | Family Services | 45.0 | 2515.27 |
| 484 | Family Services | 50.0 | 2363.03 |
| 485 | Governance | 2.0 | -7.50 |
| 486 | Governance | 17.0 | 71.74 |
| 487 | Governance | 21.0 | 88.99 |
| 488 | Governance | 26.0 | 26.98 |
| 489 | Governance | 39.0 | 20.00 |
| 490 | Governance | 43.0 | 20.00 |
| 491 | Governance | 50.0 | 20.00 |
| 492 | HRA | 47.0 | 0.00 |
| 493 | Internal Audit & CAFT | 2.0 | 6.55 |
| 494 | Internal Audit & CAFT | 15.0 | 90.45 |
| 495 | Internal Audit & CAFT | 25.0 | -0.40 |
| 496 | Internal Audit & CAFT | 29.0 | 109.46 |
| 497 | Internal Audit & CAFT | 30.0 | 45.00 |
| 498 | Internal Audit & CAFT | 38.0 | 40.00 |
| 499 | Internal Audit & CAFT | 39.0 | 294.00 |
| 500 | Internal Audit & CAFT | 40.0 | 82.84 |
| 501 | Internal Audit & CAFT | 44.0 | 3.50 |
| 502 | NSCSO | 16.0 | 0.93 |
| 503 | NSCSO | 29.0 | 130.00 |
| 504 | NSCSO | 30.0 | 4.50 |
| 505 | Parking & Infrastructure | 2.0 | 0.00 |
| 506 | Parking & Infrastructure | 7.0 | 0.00 |
| 507 | Parking & Infrastructure | 12.0 | 0.00 |
| 508 | Parking & Infrastructure | 23.0 | 875.42 |
| 509 | Parking & Infrastructure | 24.0 | 26.66 |
| 510 | Parking & Infrastructure | 30.0 | 0.00 |
| 511 | Parking & Infrastructure | 41.0 | 0.00 |
| 512 | Parking & Infrastructure | 48.0 | 142.84 |
| 513 | Parking & Infrastructure | 50.0 | 0.00 |
| 514 | Public Health | 10.0 | 0.19 |
| 515 | Public Health | 17.0 | -50.69 |
| 516 | Public Health | 25.0 | 0.00 |
| 517 | Regional Enterprise | 3.0 | 0.00 |
| 518 | Regional Enterprise | 4.0 | 13.85 |
| 519 | Regional Enterprise | 11.0 | 0.00 |
| 520 | Regional Enterprise | 45.0 | 0.00 |
| 521 | Regional Enterprise | 48.0 | 0.00 |
| 522 | Regional Enterprise | 51.0 | 0.00 |
| 523 | Strategic Commissioning Board | 26.0 | 3.60 |
| 524 | Street Scene | 1.0 | 89.00 |
| 525 | Street Scene | 2.0 | 54.00 |
| 526 | Street Scene | 11.0 | 20.00 |
| 527 | Street Scene | 16.0 | 8.48 |
| 528 | Street Scene | 17.0 | 12.68 |
| 529 | Street Scene | 23.0 | 105.43 |
| 530 | Street Scene | 24.0 | 127.84 |
| 531 | Street Scene | 25.0 | 117.35 |
| 532 | Street Scene | 27.0 | 6.45 |
| 533 | Street Scene | 28.0 | 15.00 |
| 534 | Street Scene | 29.0 | 158.74 |
| 535 | Street Scene | 31.0 | 121.69 |
| 536 | Street Scene | 33.0 | 19.54 |
| 537 | Street Scene | 34.0 | 19.17 |
| 538 | Street Scene | 35.0 | 127.76 |
| 539 | Street Scene | 36.0 | 15.00 |
| 540 | Street Scene | 38.0 | 86.92 |
| 541 | Street Scene | 39.0 | 1.30 |
| 542 | Street Scene | 42.0 | 41.00 |
| 543 | Street Scene | 43.0 | 29.00 |
| 544 | Street Scene | 44.0 | 483.96 |
| 545 | Street Scene | 45.0 | 44.35 |
| 546 | Street Scene | 50.0 | 1.30 |
| 547 | Streetscene | 1.0 | 0.00 |
| 548 | Streetscene | 2.0 | 166.86 |
| 549 | Streetscene | 3.0 | 165.99 |
| 550 | Streetscene | 4.0 | 110.76 |
| 551 | Streetscene | 5.0 | 17.04 |
| 552 | Streetscene | 6.0 | 227.50 |
| 553 | Streetscene | 7.0 | 0.00 |
| 554 | Streetscene | 8.0 | 494.11 |
| 555 | Streetscene | 9.0 | 57.87 |
| 556 | Streetscene | 10.0 | 0.00 |
| 557 | Streetscene | 11.0 | 0.00 |
| 558 | Streetscene | 12.0 | 0.00 |
| 559 | Streetscene | 13.0 | 0.00 |
| 560 | Streetscene | 14.0 | 0.00 |
| 561 | Streetscene | 15.0 | 30.00 |
| 562 | Streetscene | 16.0 | 0.00 |
| 563 | Streetscene | 18.0 | 0.00 |
| 564 | Streetscene | 20.0 | 0.00 |
| 565 | Streetscene | 21.0 | 0.00 |
| 566 | Streetscene | 22.0 | 0.00 |
| 567 | Streetscene | 23.0 | 23.96 |
| 568 | Streetscene | 24.0 | 14.50 |
| 569 | Streetscene | 25.0 | 0.00 |
| 570 | Streetscene | 26.0 | 0.00 |
| 571 | Streetscene | 27.0 | 0.00 |
| 572 | Streetscene | 28.0 | 144.23 |
| 573 | Streetscene | 29.0 | 0.00 |
| 574 | Streetscene | 30.0 | 0.00 |
| 575 | Streetscene | 32.0 | 0.00 |
| 576 | Streetscene | 33.0 | 0.00 |
| 577 | Streetscene | 34.0 | 0.00 |
| 578 | Streetscene | 35.0 | 0.00 |
| 579 | Streetscene | 36.0 | 220.70 |
| 580 | Streetscene | 37.0 | 0.00 |
| 581 | Streetscene | 38.0 | 0.00 |
| 582 | Streetscene | 39.0 | 0.00 |
| 583 | Streetscene | 40.0 | 30.79 |
| 584 | Streetscene | 41.0 | 0.00 |
| 585 | Streetscene | 42.0 | 0.00 |
| 586 | Streetscene | 43.0 | 0.00 |
| 587 | Streetscene | 44.0 | 0.00 |
| 588 | Streetscene | 45.0 | 0.00 |
| 589 | Streetscene | 46.0 | 107.25 |
| 590 | Streetscene | 47.0 | 164.80 |
| 591 | Streetscene | 48.0 | 909.47 |
| 592 | Streetscene | 49.0 | 51.72 |
| 593 | Streetscene | 50.0 | 9.05 |
| 594 | Streetscene | 51.0 | 609.08 |
| 595 | Streetscene | 52.0 | 0.00 |
anomaly_encoded_df[:10].T
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | |
|---|---|---|---|---|---|---|---|---|---|---|
| Service Area | Adults and Communities | Adults and Communities | Adults and Communities | Adults and Communities | Adults and Communities | Adults and Communities | Adults and Communities | Adults and Communities | Adults and Communities | Adults and Communities |
| Transaction Date | 1.0 | 2.0 | 3.0 | 4.0 | 5.0 | 6.0 | 7.0 | 8.0 | 9.0 | 10.0 |
| JV Value | 0.0 | 36.85 | 42.5 | 0.0 | 13.85 | 41.93 | 0.0 | 0.0 | 0.0 | 0.0 |
| Anomaly | False | False | False | False | False | False | False | False | False | False |
only_anomaly[:10].T
| 33 | 40 | 57 | 98 | 107 | 123 | 124 | 133 | 165 | 168 | |
|---|---|---|---|---|---|---|---|---|---|---|
| Service Area | Adults and Communities | Adults and Communities | Assurance | Assurance | CSG Managed Budget | Children's Education & Skills | Children's Education & Skills | Children's Education & Skills | Children's Education & Skills | Children's Education & Skills |
| Transaction Date | 34.0 | 41.0 | 5.0 | 46.0 | 6.0 | 3.0 | 4.0 | 15.0 | 48.0 | 51.0 |
| JV Value | 431.05 | 1079.5 | 39.85 | 83.25 | 418.46 | 981.49 | 1238.16 | 852.28 | 1121.65 | 1441.11 |
def anomaly_sct(data,date_col,grp_col,val_col,tres):
"""
arg:same to the old function
function : scatter plot on whole data anomaly or not
reutrn: None
"""
anom_temp=data[val_col]>tres.fillna(0)
# anom_temp=tres
plt.figure(figsize=(14,6))
g=sns.scatterplot(data=data,x=date_col,y=val_col,hue=grp_col, legend=False, label='normal',palette='tab20')
sns.scatterplot(data=data[anom_temp],x=date_col,y=val_col,hue=grp_col,palette='tab20',s=100,marker='x',legend=False,label='doubt')
g.yaxis.set_major_formatter('${:,.0f}'.format)
plt.title('anomaly_over_weeks')
plt.xlabel('week')
plt.ylabel('Amount')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
anomaly_sct(anomaly_encoded_df,'Transaction Date','Service Area','JV Value',treshold)
# Plot anomalies for each service area separately
for service_area, data in anomaly_encoded_df.groupby('Service Area'):
plt.figure(figsize=(10, 6))
sns.scatterplot(data=data, x='Transaction Date', y='JV Value', hue='Anomaly', palette='tab10', markers=['o', 'x'], s=100)
plt.title(service_area)
plt.xlabel('week')
plt.ylabel('Amount')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
# top 100 anomalies
top_anomalies = anomaly_encoded_df[anomaly_encoded_df['Anomaly']].nlargest(100, 'JV Value')
plt.figure(figsize=(10, 6))
sns.scatterplot(data=top_anomalies, x='Transaction Date', y='JV Value', hue='Service Area', palette='tab20', style='Anomaly', markers=['o', 'x'], s=100)
plt.title('Top 100 Overall Anomalies with Service Area')
plt.xlabel('Transaction Date')
plt.ylabel('Total Transaction Amount')
plt.legend(title='Service Area', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.grid(True)
plt.tight_layout()
plt.show()